Wisconsin Prognosis

Libraries

library(survival)
library(FRESA.CAD)
## Loading required package: Rcpp
## Loading required package: stringr
## Loading required package: miscTools
## Loading required package: Hmisc
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
#pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)

The data

dataBreast <- read.csv("~/GitHub/RISKPLOTS/DATA/wpbc.data", header=FALSE)
table(dataBreast$V2)
## 
##   N   R 
## 151  47
rownames(dataBreast) <- dataBreast$V1
dataBreast$V1 <- NULL
dataBreast$status <- 1*(dataBreast$V2=="R")
dataBreast$V2 <- NULL
dataBreast$time <- dataBreast$V3
dataBreast$V3 <- NULL
dataBreast <- sapply(dataBreast,as.numeric)
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
dataBreast <- as.data.frame(dataBreast[complete.cases(dataBreast),])
table(dataBreast$status)
## 
##   0   1 
## 148  46

Modeling

ml <- BSWiMS.model(Surv(time,status)~1,data=dataBreast)

[++++]

sm <- summary(ml)
pander::pander(sm$coefficients)
Table continues below
  Estimate lower HR upper u.Accuracy r.Accuracy
V24 7.13e-02 1.02 1.07 1.13 0.598 0.237
V26 7.18e-03 1.00 1.01 1.01 0.593 0.237
V27 3.33e-05 1.00 1.00 1.00 0.608 0.634
V35 7.70e-06 1.00 1.00 1.00 0.727 0.237
V34 6.19e-03 1.00 1.01 1.01 0.634 0.608
Table continues below
  full.Accuracy u.AUC r.AUC full.AUC IDI NRI z.IDI
V24 0.598 0.609 0.500 0.609 0.0619 0.437 2.87
V26 0.593 0.598 0.500 0.598 0.0626 0.393 2.77
V27 0.613 0.608 0.618 0.597 0.0497 0.387 2.56
V35 0.727 0.641 0.500 0.641 0.0289 0.565 2.28
V34 0.613 0.618 0.608 0.597 0.0254 0.441 2.19
  z.NRI Delta.AUC Frequency
V24 2.67 0.1091 1
V26 2.38 0.0983 1
V27 2.33 -0.0210 1
V35 3.50 0.1412 1
V34 2.66 -0.0116 1

Cox Model Performance

Here we evaluate the model using the RRPlot() function.

The evaluation of the raw Cox model with RRPlot()

Here we will use the predicted event probability assuming a baseline hazard for events withing 5 years

index <- predict(ml,dataBreast)
timeinterval <- 2*mean(subset(dataBreast,status==1)$time)

h0 <- sum(dataBreast$status & dataBreast$time <= timeinterval)
h0 <- h0/sum((dataBreast$time > timeinterval) | (dataBreast$status==1))
pander::pander(t(c(h0=h0,timeinterval=timeinterval)),caption="Initial Parameters")
Initial Parameters
h0 timeinterval
0.323 51.1
rdata <- cbind(dataBreast$status,ppoisGzero(index,h0))
rownames(rdata) <- rownames(dataBreast)

rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90),
                     timetoEvent=dataBreast$time,
                     title="Raw Train: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

As we can see the Observed probability as well as the Time vs. Events are not calibrated.

Uncalibrated Performance Report

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.430769 0.2630 0.155 1.50e-01 0.51357
RR 2.092437 2.2391 4.322 2.50e+01 2.14188
RR_LCI 1.241160 1.2771 0.635 5.22e-02 1.16605
RR_UCI 3.527581 3.9256 29.426 1.20e+04 3.93434
SEN 0.260870 0.6957 0.978 1.00e+00 0.15217
SPE 0.891892 0.5541 0.108 6.76e-02 0.94595
BACC 0.576381 0.6249 0.543 5.34e-01 0.54906
NetBenefit -0.000546 0.0436 0.107 1.12e-01 -0.00745
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.81 0.593 1.08 0.163
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
0.992 0.992 0.932 1.05
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
0.887 0.887 0.878 0.896
pander::pander(rrAnalysisTrain$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.678 0.678 0.595 0.759
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.634 0.542 0.726
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.261 0.143 0.411
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.899 0.838 0.942
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.432
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 11.670372 on 1 degrees of freedom, p = 0.000635
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 167 34 41.11 1.23 11.7
class=1 27 12 4.89 10.33 11.7

Cox Calibration

op <- par(no.readonly = TRUE)


calprob <- CoxRiskCalibration(ml,dataBreast,"status","time")

( 48.54674 , 146.2793 , 0.9494685 , 46 , 50.48553 )

pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 DeltaTime=calprob$timeInterval),
               caption="Cox Calibration Parameters")
h0 Gain DeltaTime
0.298 0.924 48.5

The RRplot() of the calibrated model

h0 <- calprob$h0
timeinterval <- calprob$timeInterval;

rdata <- cbind(dataBreast$status,calprob$prob)


rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90),
                     timetoEvent=dataBreast$time,
                     title="Calibrated Train: Breast",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Calibrated Train Performance

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.40592 0.2457 0.144 1.39e-01 0.49831
RR 2.09244 2.2391 4.322 2.50e+01 2.49901
RR_LCI 1.24116 1.2771 0.635 5.22e-02 1.40627
RR_UCI 3.52758 3.9256 29.426 1.20e+04 4.44086
SEN 0.26087 0.6957 0.978 1.00e+00 0.15217
SPE 0.89189 0.5541 0.108 6.76e-02 0.95946
BACC 0.57638 0.6249 0.543 5.34e-01 0.55582
NetBenefit 0.00551 0.0541 0.118 1.22e-01 0.00537
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.833 0.609 1.11 0.252
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.02 1.02 0.964 1.08
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
0.921 0.921 0.911 0.931
pander::pander(t(rrAnalysisTrain$c.index$cstatCI),caption="C. Index")
C. Index
mean.C Index median lower upper
0.678 0.677 0.596 0.754
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.634 0.542 0.726
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.261 0.143 0.411
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.899 0.838 0.942
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.407
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 11.670372 on 1 degrees of freedom, p = 0.000635
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 167 34 41.11 1.23 11.7
class=1 27 12 4.89 10.33 11.7

Cross-Validation

Here we use the estimated h0 and timeinterval from the full set

rcv <- randomCV(theData=dataBreast,
                theOutcome = Surv(time,status)~1,
                fittingFunction=BSWiMS.model, 
                trainFraction = 0.9,
                repetitions=100,
                classSamplingType = "Pro"
         )

.[++++].[+++].[+++].[++++++].[+++++++].[++++].[+++].[+++++++].[++].[+++]10 Tested: 130 Avg. Selected: 5 Min Tests: 1 Max Tests: 4 Mean Tests: 1.538462 . MAD: 0.4838058

.[+++++].[+++++].[++++].[–].[++++++].[+++++].[++++].[++++++].[+++].[+++]20 Tested: 176 Avg. Selected: 4.55 Min Tests: 1 Max Tests: 5 Mean Tests: 2.272727 . MAD: 0.4675845

.[+].[+++++].[+++++++].[+++++].[+++++++].[+++++++].[+++].[++].[+].[+]30 Tested: 187 Avg. Selected: 4.433333 Min Tests: 1 Max Tests: 8 Mean Tests: 3.208556 . MAD: 0.4740649

.[+++++].[++++].[++++].[+++++].[+++++].[++++].[++++++].[–].[++++++++].[+++]40 Tested: 193 Avg. Selected: 4.475 Min Tests: 1 Max Tests: 10 Mean Tests: 4.145078 . MAD: 0.472692

.[++++].[++++].[++++++].[++].[++++++].[++++++].[++++++++].[+++].[++].[+++++++]50 Tested: 194 Avg. Selected: 4.66 Min Tests: 1 Max Tests: 11 Mean Tests: 5.154639 . MAD: 0.47836

.[+++].[+++].[+++].[-+++].[+++].[+++++++].[++++++].[++].[++++++++].[+++]60 Tested: 194 Avg. Selected: 4.683333 Min Tests: 1 Max Tests: 13 Mean Tests: 6.185567 . MAD: 0.4787073

.[++++].[+++++].[+].[-+].[++++++++].[+++++].[++].[+++].[+++++].[+++++]70 Tested: 194 Avg. Selected: 4.6 Min Tests: 2 Max Tests: 14 Mean Tests: 7.216495 . MAD: 0.4798368

.[++++].[–].[++++].[+++].[+++++].[+++++].[+++].[+++].[+++].[+++]80 Tested: 194 Avg. Selected: 4.4625 Min Tests: 2 Max Tests: 14 Mean Tests: 8.247423 . MAD: 0.4813706

.[++++++++].[++++++].[+].[+++++].[++++++].[++++++].[++++++].[+++++++].[+++++].[+++++++]90 Tested: 194 Avg. Selected: 4.666667 Min Tests: 2 Max Tests: 17 Mean Tests: 9.278351 . MAD: 0.4812631

.[+++++++].[–].[-+].[++++].[+++++].[++++].[+++].[++++].[++++++].[+++++]100 Tested: 194 Avg. Selected: 4.61 Min Tests: 3 Max Tests: 18 Mean Tests: 10.30928 . MAD: 0.4817839

stp <- rcv$survTestPredictions
stp <- stp[!is.na(stp[,4]),]

bbx <- boxplot(unlist(stp[,1])~rownames(stp),plot=FALSE)
times <- bbx$stats[3,]
status <- boxplot(unlist(stp[,2])~rownames(stp),plot=FALSE)$stats[3,]
prob <- ppoisGzero(boxplot(unlist(stp[,4])~rownames(stp),plot=FALSE)$stats[3,],h0)

rdatacv <- cbind(status,prob)
rownames(rdatacv) <- bbx$names
names(times) <- bbx$names

rrAnalysisTest <- RRPlot(rdatacv,atThr = rrAnalysisTrain$thr_atP,
                     timetoEvent=times,
                     title="Test: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Cross-Validation Test Performance

pander::pander(t(rrAnalysisTest$keyPoints),caption="Threshold values")
Threshold values
  @:0.407 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.40935 0.2409 0.155 1.45e-01 0.5037
RR 1.71811 2.1934 3.106 2.77e+01 1.5679
RR_LCI 0.97105 1.2511 0.804 5.75e-02 0.7388
RR_UCI 3.03990 3.8454 11.994 1.33e+04 3.3277
SEN 0.21739 0.6957 0.957 1.00e+00 0.1087
SPE 0.88514 0.5473 0.149 7.43e-02 0.9392
BACC 0.55126 0.6215 0.553 5.37e-01 0.5239
NetBenefit -0.00917 0.0554 0.108 1.18e-01 -0.0213
pander::pander(t(rrAnalysisTest$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.818 0.599 1.09 0.182
pander::pander(t(rrAnalysisTest$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.02 1.02 0.96 1.08
pander::pander(t(rrAnalysisTest$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
0.892 0.892 0.878 0.907
pander::pander(rrAnalysisTest$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.659 0.658 0.581 0.732
pander::pander(t(rrAnalysisTest$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.611 0.518 0.703
pander::pander((rrAnalysisTest$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.217 0.109 0.364
pander::pander((rrAnalysisTest$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.885 0.822 0.932
pander::pander(t(rrAnalysisTest$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.407
pander::pander(rrAnalysisTest$surdif,caption="Logrank test")
Logrank test Chisq = 5.905766 on 1 degrees of freedom, p = 0.015091
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 167 36 41.07 0.626 5.91
class=1 27 10 4.93 5.222 5.91

Calibrating the test results

rdatacv <- cbind(status,prob,times)
calprob <- CalibrationProbPoissonRisk(rdatacv)

( 48.81403 , 146.2793 , 0.9546962 , 46 , 51.35036 )

pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 DeltaTime=calprob$timeInterval),
               caption="Cox Calibration Parameters")
h0 Gain DeltaTime
0.323 1 48.8
timeinterval <- calprob$timeInterval;

rdata <- cbind(status,calprob$prob)


rrAnalysisTest <- RRPlot(rdata,atRate=c(0.90),
                     timetoEvent=times,
                     title="Calibrated Test: Breast",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

### Calibrated Test Performance

pander::pander(t(rrAnalysisTest$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.422 0.2409 0.155 1.45e-01 0.5037
RR 1.723 2.1934 3.106 2.77e+01 1.5679
RR_LCI 0.955 1.2511 0.804 5.75e-02 0.7388
RR_UCI 3.108 3.8454 11.994 1.33e+04 3.3277
SEN 0.196 0.6957 0.957 1.00e+00 0.1087
SPE 0.899 0.5473 0.149 7.43e-02 0.9392
BACC 0.547 0.6215 0.553 5.37e-01 0.5239
NetBenefit -0.010 0.0554 0.108 1.18e-01 -0.0213
pander::pander(t(rrAnalysisTest$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.822 0.602 1.1 0.204
pander::pander(t(rrAnalysisTest$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.02 1.02 0.958 1.09
pander::pander(t(rrAnalysisTest$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
0.894 0.894 0.879 0.909
pander::pander(rrAnalysisTest$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.659 0.658 0.575 0.737
pander::pander(t(rrAnalysisTest$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.611 0.518 0.703
pander::pander((rrAnalysisTest$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.174 0.0782 0.314
pander::pander((rrAnalysisTest$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.899 0.838 0.942
pander::pander(t(rrAnalysisTest$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.424
pander::pander(rrAnalysisTest$surdif,caption="Logrank test")
Logrank test Chisq = 3.563064 on 1 degrees of freedom, p = 0.059079
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 171 38 41.71 0.33 3.56
class=1 23 8 4.29 3.21 3.56